library(GENIE3)
library(doParallel)
library(igraph)
library(tidyverse)
library(DT)
library(reticulate)
library(learn2count)
library(rbenchmark)
library(reshape2)
library(gridExtra)
library(DiagrammeR)
library(pROC)
library(JRF)
library(DiagrammeRsvg)
library(rsvg)
library(RColorBrewer)
library(rbenchmark)
library(ZILGM)
library(patchwork)
library(scales)
library(INetTool)
use_python("/usr/bin/python3", required = TRUE)
arboreto <- import("arboreto.algo")
pandas <- import("pandas")
numpy <- import("numpy")
execution_times <- list()
source("generate_adjacency.R")
source("symmetrize.R")
source("pscores.R")
source("plotg.R")
source("compare_consensus.R")
source("create_consensus.R")
source("earlyj.R")
source("plotROC.R")
source("cutoff_adjacency.R")
source("infer_networks.R")
grViz_output <- DiagrammeR::grViz("
digraph biological_workflow {
# Set up the graph attributes
graph [layout = dot, rankdir = TB]
# Define consistent node styles
node [shape = rectangle, style = filled, color = lightblue, fontsize = 12]
# Define nodes for each step
StartNode [label = 'Ground Thruth - String Regulatory Network', shape = oval, color = seagreen, fontcolor = black]
AdjacencyMatrix [label = 'Thruth Adjacency Matrix', shape = rectangle, color = seagreen]
SimulateData [label = 'Simulate Single-Cell Data', shape = rectangle, color = goldenrod]
# Reconstruction using Three Packages
LateIntegration [label = 'Late\nIntegration', shape = oval, color = khaki]
EarlyIntegration [label = 'Early\nIntegration', shape = oval, color = khaki]
Jointanalysis [label = 'Joint\nanalysis', shape = oval, color = khaki]
# Process
earlyj [label = 'earlyj.R', shape=diamond, color=lightblue, fontcolor=black]
networkinference [label = 'infer_networks.R\nGENIE3\nGRNBoost2\nZILGM\nJRF', shape = rectangle, color = goldenrod, fontcolor=black]
symmetrize [label = 'symmetrize.R', shape = rectangle, color = goldenrod, fontcolor=black]
plotROC [label = 'plotROC.R', shape=diamond, color=lightblue, fontcolor=black]
generateadjacency [label='generate_adjacency.R\nWeighted Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
cutoffadjacency [label='cutoff_adjacency.R\nBinary Adjacency', shape=rectangle, color=goldenrod, fontcolor=black]
pscores [label='pscores.R\nTPR\nFPR\nF1\nAccuracy\nPrecision', shape=diamond, color=lightblue, fontcolor=black]
voting [label='Voting\nUnion\nIntersection', shape=diamond, color=lightblue, fontcolor=black]
plotgcompare [label='plotg.R\ncompare_consesus.R\nPlot Graphs', shape=rectangle, color=goldenrod, fontcolor=black]
# Define the workflow structure
StartNode -> AdjacencyMatrix
AdjacencyMatrix -> SimulateData
SimulateData -> LateIntegration
SimulateData -> EarlyIntegration
SimulateData -> Jointanalysis
EarlyIntegration -> earlyj
earlyj -> networkinference
LateIntegration -> networkinference
Jointanalysis -> networkinference
networkinference -> symmetrize
symmetrize -> plotROC
symmetrize -> generateadjacency
generateadjacency -> cutoffadjacency
cutoffadjacency -> pscores
cutoffadjacency -> voting
voting -> plotgcompare
voting -> pscores
}
")
svg_code <- export_svg(grViz_output)
rsvg::rsvg_png(charToRaw(svg_code), "./../analysis/flowchart.png")
grViz_output
adjm <- read.table("./../data/allblood_adjacency_matrix.csv", header = T, row.names = 1, sep = ",") %>% as.matrix()
diag(adjm) <- 0
adjm %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "Ground Truth")
gtruth <- igraph::graph_from_adjacency_matrix(adjm, mode = "undirected", diag = F)
num_nodes <- vcount(gtruth)
num_edges <- ecount(gtruth)
set.seed(1234)
plot(gtruth,
main = paste("Ground Truth\nNodes:", num_nodes, "Edges:", num_edges),
vertex.label.color = "black",
vertex.size = 6,
edge.width = 2,
vertex.label = NA,
vertex.color = "steelblue",
layout = igraph::layout_with_fr)
ncell <- 500
nodes <- nrow(adjm)
set.seed(1130)
mu_values <- c(3, 6, 9)
theta_values <- c(1, 0.7, 0.5)
count_matrices <- lapply(1:3, function(i) {
set.seed(1130 + i)
mu_i <- mu_values[i]
theta_i <- theta_values[i]
count_matrix_i <- simdata(n = ncell, p = nodes, B = adjm, family = "ZINB",
mu = mu_i, mu_noise = 1, theta = theta_i, pi = 0.2)
count_matrix_df <- as.data.frame(count_matrix_i)
colnames(count_matrix_df) <- colnames(adjm)
rownames(count_matrix_df) <- paste("cell", 1:nrow(count_matrix_df), sep = "")
return(count_matrix_df)
})
count_matrices[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "Simulated count matrix")
saveRDS(count_matrices, "./../analysis/count_matrices.RDS")
set.seed(1234)
tictoc::tic("GENIE3 late 15Cores")
genie3_late <- infer_networks(count_matrices, method="GENIE3")
saveRDS(genie3_late, "./../analysis/genie3_late.RDS")
execution_times[['GENIE3 late 15Cores']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GENIE3 late 15Cores: 371.094 sec elapsed
genie3_late[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 output")
genie3_late_wadj <- generate_adjacency(genie3_late, ground.truth = adjm)
sgenie3_late_wadj <- symmetrize(genie3_late_wadj, weight_function = "mean")
genie3_late_auc <- plotROC(sgenie3_late_wadj, adjm, plot_title = "ROC curve - GENIE3 Late Integration")
sgenie3_late_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 symmetrize output")
sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
weighted_adjm_list = sgenie3_late_wadj,
ground.truth = adjm,
n = 3,
method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.004953592
## Matrix 2 Mean 95th Percentile Cutoff: 0.004929168
## Matrix 3 Mean 95th Percentile Cutoff: 0.004974256
sgenie3_late_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 adjacency")
scores.genie3.late.all <- pscores(adjm, sgenie3_late_adj)
scores.genie3.late.all$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgenie3_late_adj)
consesusm <- create_consensus(sgenie3_late_adj, method="vote")
consesusu <- create_consensus(sgenie3_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgenie3_late_adj, weighted_list = sgenie3_late_wadj, method = "INet", threshold = 0.05, ncores = 15)
## [1] 0.3226695
## [1] 0.05773287
scores.genie3.late <- pscores(adjm, list(consesusm))
scoresu.genie3.late <- pscores(adjm, list(consesusu))
scoresnet.genie3.late <- pscores(adjm, list(consesunet))
scores.genie3.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores vote")
scores.genie3.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores union")
scoresnet.genie3.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores INet")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
ajm_compared <- compare_consensus(consesunet, adjm)
set.seed(1234)
tictoc::tic("GRNBoost2 late")
grnb_late <- infer_networks(count_matrices, method="GRNBoost2")
saveRDS(grnb_late, "./../analysis/grnb_late.RDS")
execution_times[['GRNBoost2 late']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GRNBoost2 late: 14.569 sec elapsed
grnb_late[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 output")
grnb_late_wadj <- generate_adjacency(grnb_late, ground.truth = adjm)
sgrnb_late_wadj <- symmetrize(grnb_late_wadj, weight_function = "mean")
grnb_late_auc <- plotROC(sgrnb_late_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Late Integration")
sgrnb_late_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 symmetrize output")
sgrnb_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
weighted_adjm_list = sgrnb_late_wadj,
ground.truth = adjm,
n = 3,
method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 0.5552089
## Matrix 2 Mean 95th Percentile Cutoff: 0.5631445
## Matrix 3 Mean 95th Percentile Cutoff: 0.5670532
sgrnb_late_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 adjacency")
scores.grn.late.all <- pscores(adjm, sgrnb_late_adj)
scores.grn.late.all$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgrnb_late_adj)
consesusm <- create_consensus(sgrnb_late_adj, method="vote")
consesusu <- create_consensus(sgrnb_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgrnb_late_adj, weighted_list = sgrnb_late_wadj, method = "INet", threshold = 0.05, ncores = 15)
## [1] 0.3041232
## [1] 0.0319101
scores.grn.late <- pscores(adjm, list(consesusm))
scoresu.grn.late <- pscores(adjm, list(consesusu))
scoresnet.grn.late <- pscores(adjm, list(consesunet))
scores.grn.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores vote")
scoresu.grn.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores union")
scoresnet.grn.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores INet")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
ajm_compared <- compare_consensus(consesunet, adjm)
set.seed(1234)
tictoc::tic("ZILGM late 15Cores")
zilgm_late <- infer_networks(count_matrices_list = count_matrices, method = "ZILGM", adjm = adjm)
## learning for NBII graphical model
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress : 10 %
Conducting sampling in progress : 20 %
Conducting sampling in progress : 30 %
Conducting sampling in progress : 40 %
Conducting sampling in progress : 50 %
Conducting sampling in progress : 60 %
Conducting sampling in progress : 70 %
Conducting sampling in progress : 80 %
Conducting sampling in progress : 90 %
Conducting sampling in progress : 100 %
learning for NBII graphical model
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress : 10 %
Conducting sampling in progress : 20 %
Conducting sampling in progress : 30 %
Conducting sampling in progress : 40 %
Conducting sampling in progress : 50 %
Conducting sampling in progress : 60 %
Conducting sampling in progress : 70 %
Conducting sampling in progress : 80 %
Conducting sampling in progress : 90 %
Conducting sampling in progress : 100 %
learning for NBII graphical model
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress : 10 %
Conducting sampling in progress : 20 %
Conducting sampling in progress : 30 %
Conducting sampling in progress : 40 %
Conducting sampling in progress : 50 %
Conducting sampling in progress : 60 %
Conducting sampling in progress : 70 %
Conducting sampling in progress : 80 %
Conducting sampling in progress : 90 %
Conducting sampling in progress : 100 %
Assigned lamb to lambda_results[[ 1 ]] with 50 matrices.
## Assigned lamb to lambda_results[[ 2 ]] with 50 matrices.
## Assigned lamb to lambda_results[[ 3 ]] with 50 matrices.
saveRDS(zilgm_late, "./../analysis/zilgm_late.RDS")
execution_times[['ZILGM late 15Cores']] <- tictoc::toc(log = TRUE)$toc[[1]]
## ZILGM late 15Cores: 6802.724 sec elapsed
est_graphs <- zilgm_late$network_results
lambdas <- zilgm_late$lambda_results
plotROC(lambdas[[1]], adjm, plot_title = "Lambda ROC Matrix1", is_binary = T)
plotROC(lambdas[[2]], adjm, plot_title = "Lambda ROC Matrix2", is_binary = T)
plotROC(lambdas[[3]], adjm, plot_title = "Lambda ROC Matrix3", is_binary = T)
consensus_matrices <- vector("list", 50)
for (i in 1:50) {
ranklambda <- list(lambdas[[1]][[i]], lambdas[[2]][[i]], lambdas[[3]][[i]])
consensus_matrices[[i]] <- create_consensus(ranklambda, method="vote")
}
zilgm_late_auc <- plotROC(consensus_matrices, adjm, plot_title = "ROC voting lambda", is_binary = T)
scores.zilgm.late.all <- pscores(adjm, est_graphs)
scores.zilgm.late.all$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(est_graphs)
consesusm <- create_consensus(est_graphs, method="vote")
consesusu <- create_consensus(est_graphs, method="union")
scores.zilgm.late <- pscores(adjm, list(consesusm))
scoresu.zilgm.late <- pscores(adjm, list(consesusu))
scores.zilgm.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
scoresu.zilgm.late$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
early_matrix <- list(earlyj(count_matrices))
set.seed(1234)
tictoc::tic("GENIE3 early 15Cores")
genie3_early <- infer_networks(early_matrix, method="GENIE3")
execution_times[['GENIE3 early 15Cores']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GENIE3 early 15Cores: 453.214 sec elapsed
saveRDS(genie3_early, "./../analysis/genie3_early.RDS")
genie3_early[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 output")
genie3_early_wadj <- generate_adjacency(genie3_early, ground.truth = adjm)
sgenie3_early_wadj <- symmetrize(genie3_early_wadj, weight_function = "mean")
genie3_early_auc <- plotROC(sgenie3_early_wadj, adjm, plot_title = "ROC curve - GENIE3 Early Integration")
sgenie3_early_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 symmetrize output")
sgenie3_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
weighted_adjm_list = sgenie3_early_wadj,
ground.truth = adjm,
n = 3,
method = "GENIE3")
## Matrix 1 Mean 95th Percentile Cutoff: 0.005263737
sgenie3_early_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GENIE3 adjacency")
scores.genie3.early <- pscores(adjm, sgenie3_early_adj)
scores.genie3.early$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgenie3_early_adj)
ajm_compared <- compare_consensus(sgenie3_early_adj[[1]], adjm)
set.seed(1234)
tictoc::tic("GRNBoost2 early")
grnb_early <- infer_networks(early_matrix, method="GRNBoost2")
execution_times[['GRNBoost2 early']] <- tictoc::toc(log = TRUE)$toc[[1]]
## GRNBoost2 early: 36.237 sec elapsed
saveRDS(grnb_early, "./../analysis/grnb_early.RDS")
grnb_early[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 output")
grnb_early_wadj <- generate_adjacency(grnb_early, ground.truth = adjm)
sgrnb_early_wadj <- symmetrize(grnb_early_wadj, weight_function = "mean")
grnb_early_auc <- plotROC(sgrnb_early_wadj, adjm, plot_title = "ROC curve - GRNBoost2 Early Integration")
grnb_early_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 symmetrize output")
sgrnb_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
weighted_adjm_list = sgrnb_early_wadj,
ground.truth = adjm,
n = 3,
method = "GRNBoost2")
## Matrix 1 Mean 95th Percentile Cutoff: 2.552673
sgrnb_early_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "GRNBoost2 adjacency")
scores.grn.early <- pscores(adjm, sgrnb_early_adj)
scores.grn.early$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sgrnb_early_adj)
ajm_compared <- compare_consensus(sgrnb_early_adj[[1]], adjm)
set.seed(1234)
tictoc::tic("ZILGM early 15Cores")
zilgm_late <- infer_networks(count_matrices_list = early_matrix, method = "ZILGM", adjm = adjm)
## learning for NBII graphical model
## nlambda : 50
## penalty function : LASSO
## update type : IRLS
## Conducting sampling in progress : 10 %
Conducting sampling in progress : 20 %
Conducting sampling in progress : 30 %
Conducting sampling in progress : 40 %
Conducting sampling in progress : 50 %
Conducting sampling in progress : 60 %
Conducting sampling in progress : 70 %
Conducting sampling in progress : 80 %
Conducting sampling in progress : 90 %
Conducting sampling in progress : 100 %
Assigned lamb to lambda_results[[ 1 ]] with 50 matrices.
saveRDS(zilgm_late, "./../analysis/zilgm_early.RDS")
execution_times[['ZILGM early 15Cores']] <- tictoc::toc(log = TRUE)$toc[[1]]
## ZILGM early 15Cores: 8820.091 sec elapsed
est_graphs <- zilgm_late$network_results
lambdas <- zilgm_late$lambda_results
zilgm_early_auc <- plotROC(lambdas[[1]], adjm, plot_title = "ROC ZILGM early", is_binary = T)
scores.zilgm.early <- pscores(adjm, est_graphs)
scores.zilgm.early$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
ajm_compared <- compare_consensus(est_graphs[[1]], adjm)
#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
set.seed(1234)
tictoc::tic("JRF")
jrf_mat <- infer_networks(count_matrices, method="JRF")
execution_times[['JRF']] <- tictoc::toc(log = TRUE)$toc[[1]]
## JRF: 3680.345 sec elapsed
jrf_mat[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF output")
jrf_matrices <- lapply(count_matrices, t)
jrf_matrices_norm <- lapply(jrf_matrices,function(x) {
(x - mean(x)) / sd(x)
})
genes <- rownames(jrf_matrices_norm[[1]])
set.seed(1234)
out.perm <- Run_permutation(jrf_matrices_norm,mtry=round(sqrt(length(genes)-1)),ntree=500, genes,5)
myJRF_network <- function(out.jrf, out.perm, TH) {
nclasses <- dim(out.perm)[3]
M <- dim(out.perm)[2]
out <- vector("list", nclasses)
for (net in 1:nclasses) {
j.np <- sort(out.jrf[, 2 + net], decreasing = TRUE)
FDR <- matrix(0, dim(out.perm)[1], 1)
th <- NULL
for (s in 1:length(j.np)) {
FP <- sum(sum(out.perm[, , net] >= j.np[s])) / M
FDR[s] <- FP / s
if (FDR[s] > TH) {
th <- j.np[s]
break
}
}
out[[net]] <- out.jrf[out.jrf[, 2 + net] > th, seq(1, 2)]
}
return(out)
}
mynet <- myJRF_network(jrf_mat[[1]],out.perm,0.05)
mynet
## [[1]]
## gene1 gene2
## 10653 PF4 CCR4
## 11011 CD247 CCR7
## 11680 ITGB2 CD14
## 12882 CD3D CD2
## 13733 FGR CD247
## 13788 ITGB2 CD247
## 15979 CD8A CD3D
## 16068 IL4 CD3D
## 35982 ITGB2 IL13
## 39419 SPI1 ITGB2
##
## [[2]]
## gene1 gene2
## 7591 MYO1F C5AR1
## 13727 FCGR2A CD247
## 13733 FGR CD247
## 23898 STAT5A CTLA4
## 24586 CXCR6 CXCL8
## 34499 SLC40A1 HAMP
##
## [[3]]
## gene1 gene2
## 11011 CD247 CCR7
## 12620 CD3E CD1D
## 13752 GZMH CD247
## 15979 CD8A CD3D
## 34310 PRF1 GZMK
## 34499 SLC40A1 HAMP
## 35035 IL1R1 ICAM1
## 35511 IL5RA IFNG
jrf_adj <- function(df_list, adjm) {
# Ensure the genes (row and column names) are in the adjacency matrix
genes <- rownames(adjm)
# Initialize a list to store adjacency matrices
adjacency_matrices <- list()
# Loop through each data frame in the list
for (i in seq_along(df_list)) {
df <- df_list[[i]]
# Initialize a new adjacency matrix with zeros
adj_matrix <- adjm * 0 # Reset to zero for each df
# Update the adjacency matrix for the gene pairs
for (j in seq_len(nrow(df))) {
gene1 <- df$gene1[j]
gene2 <- df$gene2[j]
# Check if both genes are present in the adjacency matrix
if (gene1 %in% genes && gene2 %in% genes) {
idx1 <- which(genes == gene1) # Find the row index for gene1
idx2 <- which(genes == gene2) # Find the column index for gene2
# Update the adjacency matrix to set an edge
adj_matrix[idx1, idx2] <- 1
adj_matrix[idx2, idx1] <- 1 # Ensure symmetry
} else {
warning(paste("Gene pair not found in adjm:", gene1, "-", gene2))
}
}
# Store the resulting adjacency matrix in the list
adjacency_matrices[[i]] <- adj_matrix
}
# Return the list of adjacency matrices
return(adjacency_matrices)
}
jrf_adj_mynet <- jrf_adj(mynet, adjm)
jrf_auc_perm <- plotROC(jrf_adj_mynet, adjm, plot_title = "ROC curve - JRF perm out", is_binary = T)
scores.jrf.perm <- pscores(adjm, jrf_adj_mynet)
scores.jrf.perm$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(jrf_adj_mynet)
consesusu <- create_consensus(jrf_adj_mynet, method="union")
scoresu.jrf.perm <- pscores(adjm, list(consesusu))
scoresu.jrf.perm$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
ajm_compared <- compare_consensus(consesusu, adjm)
jrf_list <- list()
importance_columns <- grep("importance", names(jrf_mat[[1]]), value = TRUE)
for (i in seq_along(importance_columns)) {
# Select the 'gene1', 'gene2', and the current 'importance' column
df <- jrf_mat[[1]][, c("gene1", "gene2", importance_columns[i])]
# Rename the importance column to its original name (e.g., importance1, importance2, etc.)
names(df)[3] <- importance_columns[i]
# Add the data frame to the output list
jrf_list[[i]] <- df
}
saveRDS(jrf_list, "./../analysis/jrf.RDS")
jrf_list[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF output")
jrf_wadj <- generate_adjacency(jrf_list, ground.truth = adjm)
sjrf_wadj <- symmetrize(jrf_wadj, weight_function = "mean")
jrf_auc_mine <- plotROC(sjrf_wadj, adjm, plot_title = "ROC curve - JRF Late Integration")
sjrf_wadj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF symmetrize output")
sjrf_adj <- cutoff_adjacency(count_matrices = count_matrices,
weighted_adjm_list = sjrf_wadj,
ground.truth = adjm,
n = 3,
method = "JRF")
## Matrix 1 Mean 95th Percentile Cutoff: 2.45886
## Matrix 2 Mean 95th Percentile Cutoff: 2.467922
## Matrix 3 Mean 95th Percentile Cutoff: 2.475093
sjrf_adj[[1]] %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "JRF adjacency")
scores.jrf.all <- pscores(adjm, sjrf_adj)
scores.jrf.all$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores")
plots <- plotg(sjrf_adj)
consesusm <- create_consensus(sjrf_adj, method="vote")
consesusu <- create_consensus(sjrf_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sjrf_adj, weighted_list = sjrf_wadj, method = "INet", threshold = 0.1, ncores = 15)
## [1] 0.3528009
## [1] 0.1028155
## [1] 0.0268126
scores.jrf <- pscores(adjm, list(consesusm))
scoresu.jrf <- pscores(adjm, list(consesusu))
scoresnet.jrf <- pscores(adjm, list(consesunet))
scores.jrf$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores vote")
scoresu.jrf$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores union")
scoresnet.jrf$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores INet")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
ajm_compared <- compare_consensus(consesunet, adjm)
est <- list()
tictoc::tic()
for (k in seq_along(count_matrices)) {
est[[k]] <- PCzinb(as.matrix(count_matrices[[k]]), method="zinb1", maxcard=2)
}
tictoc::toc()
## 71687.541 sec elapsed
for (k in seq_along(est)) {
rownames(est[[k]]) <- rownames(adjm)
colnames(est[[k]]) <- colnames(adjm)
}
consesusm <- create_consensus(est, method="vote")
consesusu <- create_consensus(est, method="union")
scores.pc <- pscores(adjm, list(consesusm))
scoresu.pc <- pscores(adjm, list(consesusu))
scores.pc$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores vote")
scoresu.pc$Statistics %>%
datatable(extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('csv', 'excel'),
scrollX = TRUE,
pageLength = 10),
caption = "scores union")
ajm_compared <- compare_consensus(consesusm, adjm)
ajm_compared <- compare_consensus(consesusu, adjm)
time_data <- data.frame(
Method = names(execution_times),
Time_in_Hours = unlist(execution_times) / 3600
)
time_data$Time_in_Minutes <- time_data$Time_in_Hours * 60
time_data <- time_data[order(time_data$Time_in_Hours), ]
time_data$Method <- factor(time_data$Method, levels = time_data$Method)
time_data <- time_data %>%
mutate(Method_Group = case_when(
grepl("GENIE3", Method) ~ "GENIE3",
grepl("GRNBoost2", Method) ~ "GRNBoost2",
grepl("ZILGM", Method) ~ "ZILGM",
grepl("JRF", Method) ~ "JRF"
))
method_colors <- c("GENIE3" = "darkblue", "GRNBoost2" = "darkgreen", "ZILGM" = "orange", "JRF" = "red")
time_plot <- ggplot(time_data, aes(x = Method, y = Time_in_Hours, fill = Method_Group)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sprintf("%.1f min", Time_in_Minutes)), vjust = -0.5) +
labs(title = "Execution Time for Each Method", y = "Time (Hours)", x = NULL) +
scale_fill_manual(values = method_colors) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)
print(time_plot)
mplot <- list()
for (k in c("TPR", "FPR", "Precision", "F1", "Accuracy")) {
mplot[[k]] <- data.frame(
GENIE3_late = scores.genie3.late$Statistics[[k]],
GENIE3_early = scores.genie3.early$Statistics[[k]],
GRNBoost2_late = scores.grn.late$Statistics[[k]],
GRNBoost2_early = scores.grn.early$Statistics[[k]],
ZILGM_late = scores.zilgm.late$Statistics[[k]],
ZILGM_early = scores.zilgm.early$Statistics[[k]],
JRF = scores.jrf$Statistics[[k]]
)
}
mplot[["AUC"]] <- data.frame(
GENIE3_late = mean(genie3_late_auc$AUC),
GENIE3_early = genie3_early_auc$AUC,
GRNBoost2_late = mean(grnb_late_auc$AUC),
GRNBoost2_early = grnb_early_auc$AUC,
ZILGM_late = zilgm_late_auc$AUC,
ZILGM_early = zilgm_early_auc$AUC,
JRF = jrf_auc_mine$AUC
)
plot_data <- bind_rows(lapply(names(mplot), function(metric) {
data.frame(
Metric = metric,
Method = names(mplot[[metric]]),
Value = as.numeric(mplot[[metric]][1, ])
)
}))
plot_data <- plot_data %>%
mutate(Method_Group = case_when(
grepl("GENIE3", Method) ~ "GENIE3",
grepl("GRNBoost2", Method) ~ "GRNBoost2",
grepl("ZILGM", Method) ~ "ZILGM",
grepl("JRF", Method) ~ "JRF"
)) %>%
mutate(Method = factor(Method, levels = c(
"GENIE3_early", "GENIE3_late",
"GRNBoost2_early", "GRNBoost2_late",
"ZILGM_early", "ZILGM_late",
"JRF" # Ensure JRF is last
)))
plots <- lapply(unique(plot_data$Metric), function(metric) {
show_x_text <- metric %in% c("Accuracy", "AUC")
ggplot(plot_data %>% filter(Metric == metric), aes(x = Method, y = Value, fill = Method_Group)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
labs(title = metric, y = "Value", x = NULL) +
scale_y_continuous(limits = c(0, 1)) +
scale_fill_manual(values = method_colors) +
theme_minimal() +
theme(
axis.text.x = if (show_x_text) element_text(angle = 45, hjust = 1) else element_blank(),
axis.title.x = element_blank(),
legend.position = "none"
)
})
final_plot <- (plots[[1]] | plots[[2]]) /
(plots[[3]] | plots[[4]]) /
(plots[[5]] | plots[[6]]) +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
print(final_plot)
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=it_IT.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=it_IT.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=it_IT.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] doRNG_1.8.2 rngtools_1.5.2 INetTool_0.1.0 scales_1.2.0
## [5] patchwork_1.1.1 ZILGM_0.1.1 RColorBrewer_1.1-3 rsvg_2.6.1
## [9] DiagrammeRsvg_0.1 JRF_0.1-4 pROC_1.18.0 DiagrammeR_1.0.11
## [13] gridExtra_2.3 reshape2_1.4.4 rbenchmark_1.0.0 learn2count_0.3.2
## [17] reticulate_1.34.0 DT_0.22 forcats_0.5.1 stringr_1.4.0
## [21] dplyr_1.0.9 purrr_0.3.4 readr_2.1.2 tidyr_1.2.0
## [25] tibble_3.1.7 ggplot2_3.3.6 tidyverse_1.3.1 igraph_2.0.3
## [29] doParallel_1.0.17 iterators_1.0.14 foreach_1.5.2 GENIE3_1.16.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.0 backports_1.4.1
## [3] plyr_1.8.7 splines_4.1.0
## [5] crosstalk_1.2.0 GenomeInfoDb_1.30.1
## [7] digest_0.6.29 htmltools_0.5.2
## [9] fansi_1.0.3 magrittr_2.0.3
## [11] tzdb_0.3.0 modelr_0.1.8
## [13] matrixStats_0.62.0 colorspace_2.0-3
## [15] rvest_1.0.2 haven_2.5.0
## [17] xfun_0.30 crayon_1.5.1
## [19] RCurl_1.98-1.6 jsonlite_1.8.0
## [21] graph_1.72.0 survival_3.3-1
## [23] glue_1.6.2 gtable_0.3.0
## [25] zlibbioc_1.40.0 distributions3_0.2.2
## [27] XVector_0.34.0 DelayedArray_0.20.0
## [29] V8_6.0.0 car_3.0-13
## [31] shape_1.4.6 SingleCellExperiment_1.16.0
## [33] BiocGenerics_0.40.0 abind_1.4-5
## [35] pscl_1.5.9 DBI_1.1.2
## [37] rstatix_0.7.0 Rcpp_1.0.8.3
## [39] datastructures_0.2.9 stats4_4.1.0
## [41] glmnet_4.1-8 htmlwidgets_1.5.4
## [43] httr_1.4.3 WeightSVM_1.7-16
## [45] ellipsis_0.3.2 farver_2.1.0
## [47] pkgconfig_2.0.3 sass_0.4.1
## [49] dbplyr_2.1.1 utf8_1.2.2
## [51] labeling_0.4.2 tidyselect_1.1.2
## [53] rlang_1.1.4 munsell_0.5.0
## [55] cellranger_1.1.0 tools_4.1.0
## [57] visNetwork_2.1.2 cli_3.3.0
## [59] generics_0.1.2 statnet.common_4.10.0
## [61] broom_0.8.0 evaluate_0.15
## [63] fastmap_1.1.0 yaml_2.3.5
## [65] bst_0.3-24 knitr_1.39
## [67] fs_1.5.2 caTools_1.18.2
## [69] tictoc_1.2.1 xml2_1.3.3
## [71] mpath_0.4-2.26 multinet_4.2.1
## [73] compiler_4.1.0 rstudioapi_0.13
## [75] curl_4.3.2 png_0.1-7
## [77] ggsignif_0.6.3 reprex_2.0.1
## [79] bslib_0.3.1 stringi_1.7.6
## [81] highr_0.9 lattice_0.20-45
## [83] iZID_0.0.1 Matrix_1.6-1.1
## [85] gbm_2.2.2 vctrs_0.4.1
## [87] pillar_1.7.0 lifecycle_1.0.1
## [89] jquerylib_0.1.4 bitops_1.0-7
## [91] GenomicRanges_1.46.1 R6_2.5.1
## [93] network_1.18.2 IRanges_2.28.0
## [95] codetools_0.2-18 MASS_7.3-57
## [97] assertthat_0.2.1 SummarizedExperiment_1.24.0
## [99] withr_2.5.0 S4Vectors_0.32.4
## [101] GenomeInfoDbData_1.2.7 hms_1.1.1
## [103] grid_4.1.0 rpart_4.1.16
## [105] coda_0.19-4 flux_0.3-0.1
## [107] rmarkdown_2.14 MatrixGenerics_1.6.0
## [109] carData_3.0-5 ggpubr_0.4.0
## [111] numDeriv_2016.8-1.1 Biobase_2.54.0
## [113] lubridate_1.8.0